Zanzibar small scale fishery

Preliminary analyses

Author

Lorenzo Longobardi

1. Aim

This report presents preliminary analyses of Zanzibar’s small-scale fisheries, including gear usage, vessel types, fishing effort, catch rates, and revenue across different districts.

Show code
path <- "inst/reports/zanizbarSSF_clean__20250414093746_da3c69d__.parquet"
dat <- rio::import("zanizbarSSF_clean__20250414093746_da3c69d__.parquet") |>
  dplyr::as_tibble()
stats <- rio::import("fishery_stats__20250417160421_07d4dba__.parquet") |>
  dplyr::as_tibble()

taxa <- rio::import("aggregated_taxa__20250427171641_ab5e96e__.parquet") |>
  dplyr::as_tibble()


if (FALSE) {
we <-
  validated_surveys |>
  dplyr::mutate(n_fishers = .data$no_men_fishers + .data$no_women_fishers + .data$no_child_fishers) |>
  dplyr::select(submission_id, district, trip_duration, gear, vessel_type, n_fishers) |>
  dplyr::group_by(submission_id) |>
  dplyr::summarise(dplyr::across(dplyr::everything(), ~ dplyr::first(.x))) |>
  dplyr::mutate(
    gear = dplyr::case_when(
      .data$gear == "HL" ~ "Hand line",
      .data$gear == "BS" ~ "Beach Seine",
      .data$gear == "CS" ~ "Cast Net",
      .data$gear == "GN" ~ "Gill Net",
      .data$gear == "LL" ~ "Long line",
      .data$gear == "SP" ~ "Spear gun",
      .data$gear == "SR" ~ "Stick Rod",
      .data$gear == "PS" ~ "Purse Seine",
      .data$gear == "RN" ~ "Ring Net",
      .data$gear == "SN" ~ "Shark Net",
      .data$gear == "TR" ~ "Trap",
      TRUE ~ .data$gear # Keep original value if no match
    ),
    vessel_type = dplyr::case_when(
      .data$vessel_type == "1" ~ "Fibre Boat",
      .data$vessel_type == "2" ~ "Dhow",
      .data$vessel_type == "3" ~ "Dugout canoe",
      .data$vessel_type == "4" ~ "Outrigger canoe",
      .data$vessel_type == "5" ~ "Planked Boat",
      .data$vessel_type == "6" ~ "Wooden Boat",
      TRUE ~ .data$vessel_type # Keep original value if no match
    )
  )
    }

district_colors <- c(
  "urban" = "#1E88E5", # Blue
  "west_a" = "#FF5722", # Orange
  "central" = "#43A047", # Green
  "west_b" = "#FFC107", # Yellow
  "north_a" = "#9C27B0", # Purple
  "wete" = "#F44336", # Red
  "mkoani" = "#009688", # Teal
  "north_b" = "#795548", # Brown
  "chake_chake" = "#607D8B", # Blue Grey
  "south" = "#3F51B5", # Indigo
  "micheweni" = "#E91E63" # Pink
)

scale_y_reordered <- function(...) {
  scale_y_discrete(labels = function(x) gsub("___.*", "", x), ...)
}

reorder_within <- function(x, by, within, fun = mean, sep = "___", ...) {
  new_x <- paste(x, within, sep = sep)
  stats::reorder(new_x, by, FUN = fun)
}
# Create a custom  theme
theme_modern <- function() {
  theme_minimal() +
    theme(
      # Text elements
      plot.title = element_text(size = 18, face = "bold", margin = margin(b = 10)),
      plot.subtitle = element_text(size = 12, color = "#555555", margin = margin(b = 20)),
      plot.caption = element_markdown(size = 8, color = "#888888", margin = margin(t = 15)),

      # Axis formatting
      axis.title = element_text(size = 10, color = "#555555", face = "bold"),
      axis.text = element_text(size = 9, color = "#333333"),
      axis.text.x = element_text(angle = 0),
      axis.title.x = element_text(margin = margin(t = 10)),
      axis.title.y = element_text(margin = margin(r = 10)),

      # Grid lines
      panel.grid.minor = element_blank(),
      panel.grid.major.x = element_blank(),
      panel.grid.major.y = element_line(color = "#E0E0E0", size = 0.3),

      # Legend
      legend.position = "top",
      legend.title = element_blank(),
      legend.text = element_text(size = 10),
      legend.background = element_rect(fill = "white", color = NA),
      legend.key.size = unit(1, "cm"),

      # Plot background
      plot.background = element_rect(fill = "white", color = NA),
      panel.background = element_rect(fill = "white", color = NA),

      # Margins
      plot.margin = margin(20, 25, 20, 25)
    )
}

Fishery characterization

Gear usage

Show code
gear_viz_grouped <-
  stats %>%
  dplyr::group_by(district, gear) |>
  dplyr::count() |>
  dplyr::group_by(district) |>
  dplyr::mutate(
    total = sum(n),
    prc = n / total * 100
  ) |>
  dplyr::arrange(desc(prc), .by_group = TRUE) |>
  mutate(prc_label = paste0(round(prc, 1), "%")) |>
  dplyr::filter(total > 1)

ggplot(gear_viz_grouped, aes(x = prc, y = reorder_within(gear, prc, district))) +
  # Add dots, sized by percentage
  geom_point(aes(size = prc, color = district), alpha = 0.8) +

  # Add connecting segments
  geom_segment(aes(xend = 0, yend = reorder_within(gear, prc, district), color = district),
    alpha = 0.3, size = 0.8
  ) +

  # Add percentage labels
  geom_text(aes(label = prc_label, color = district),
    hjust = -0.5,
    size = 3,
    show.legend = FALSE
  ) +

  # Set size scale
  scale_size_continuous(range = c(2, 8)) +
  scale_color_manual(values = district_colors) +

  # Scale y axis with region-specific ordering
  scale_y_reordered() +

  # Set x-axis scale
  scale_x_continuous(
    labels = function(x) paste0(x, "%"),
    limits = c(0, max(gear_viz_grouped$prc) * 1.2),
    expand = c(0, 0)
  ) +

  # Add informative labels
  labs(
    title = "Fishing Gear Usage in Zanzibar",
    subtitle = "Gear type distribution by district (percentage of total usage)",
    x = "Gear usage",
    y = "",
    caption = "Source: PESKAS"
  ) +

  # Apply the custom theme
  theme_modern() +
  # Facet by region
  facet_wrap(~district, ncol = 2, scales = "free_y") +

  # Hide the size legend
  guides(
    size = "none",
    color = "none"
  )

Vessel type

Show code
gear_viz_grouped <-
  stats %>%
  dplyr::mutate(vessel_type = dplyr::if_else(is.na(vessel_type), "Unknown", vessel_type)) |>
  dplyr::group_by(district, vessel_type) |>
  dplyr::count() |>
  dplyr::group_by(district) |>
  dplyr::mutate(
    total = sum(n),
    prc = n / total * 100
  ) |>
  dplyr::arrange(desc(prc), .by_group = TRUE) |>
  mutate(prc_label = paste0(round(prc, 1), "%")) |>
  dplyr::filter(total > 1)

ggplot(gear_viz_grouped, aes(x = prc, y = reorder_within(vessel_type, prc, district))) +
  # Add dots, sized by percentage
  geom_point(aes(size = prc, color = district), alpha = 0.8) +

  # Add connecting segments
  geom_segment(aes(xend = 0, yend = reorder_within(vessel_type, prc, district), color = district),
    alpha = 0.3, size = 0.8
  ) +

  # Add percentage labels
  geom_text(aes(label = prc_label, color = district),
    hjust = -0.5,
    size = 3,
    show.legend = FALSE
  ) +

  # Set size scale
  scale_color_manual(values = district_colors) +
  scale_size_continuous(range = c(2, 8)) +

  # Scale y axis with region-specific ordering
  scale_y_reordered() +

  # Set x-axis scale
  scale_x_continuous(
    labels = function(x) paste0(x, "%"),
    limits = c(0, max(gear_viz_grouped$prc) * 1.2),
    expand = c(0, 0)
  ) +

  # Add informative labels
  labs(
    title = "Vessel Types in Zanzibar",
    subtitle = "Vessel type distribution by district (percentage of total usage)",
    x = "Vessel type usage",
    y = "",
    caption = "Source: PESKAS"
  ) +

  # Apply the custom theme
  theme_modern() +
  theme(
    legend = "none",
    color = "none"
  ) +

  # Facet by region
  facet_wrap(~district, ncol = 2, scales = "free_y") +

  # Hide the size legend
  guides(
    size = "none",
    color = "none"
  )

Fishing effort

Show code
fisher_data <- stats

# Calculate summary statistics by district
district_summary <- fisher_data %>%
  dplyr::group_by(district) %>%
  summarize(
    mean_fishers = mean(n_fishers, na.rm = TRUE),
    median_fishers = median(n_fishers, na.rm = TRUE),
    mean_duration = mean(trip_duration, na.rm = TRUE),
    median_duration = median(trip_duration, na.rm = TRUE),
    n_trips = n()
  ) %>%
  ungroup()

# Create a two-panel visualization
p1 <- ggplot(district_summary, aes(x = reorder(district, mean_fishers), y = mean_fishers, fill = district)) +
  geom_col(width = 0.7) +
  geom_text(aes(label = round(mean_fishers, 0), y = mean_fishers + 0.5),
    vjust = 0, size = 3.5
  ) +
  geom_text(aes(label = sprintf("n=%d", n_trips), y = 0.5),
    vjust = 0, hjust = 0, angle = 90, size = 3, color = "white"
  ) +
  labs(
    title = "Average Number of fishers by District",
    subtitle = sprintf("Based on %d fishing trips", sum(district_summary$n_trips)),
    x = "",
    y = "Average N. of fihsers"
  ) +
  scale_fill_manual(values = district_colors) +
  theme_modern() +
  theme(
    legend.position = "none",
    panel.grid.major.x = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

# Create a second panel showing district averages
p2 <- ggplot(district_summary, aes(x = reorder(district, mean_duration), y = mean_duration, fill = district)) +
  geom_col(width = 0.7) +
  geom_text(aes(label = sprintf("%.1f h", mean_duration), y = mean_duration + 0.5),
    vjust = 0, size = 3.5
  ) +
  geom_text(aes(label = sprintf("n=%d", n_trips), y = 0.5),
    vjust = 0, hjust = 0, angle = 90, size = 3, color = "white"
  ) +
  labs(
    title = "Average Fishing Trip Duration by District",
    subtitle = sprintf("Based on %d fishing trips", sum(district_summary$n_trips)),
    x = "",
    y = "Average Trip Duration (hours)"
  ) +
  scale_fill_manual(values = district_colors) +
  theme_modern() +
  theme(
    legend.position = "none",
    panel.grid.major.x = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

combined_plot <- p1 / p2 + plot_layout(heights = c(2, 1))

# Display the combined plot
combined_plot

Catch

Show code
district_summary <- dat %>%
  dplyr::select(district, cpue, rpue) |>
  dplyr::mutate(rpue_usd = rpue * 0.00037) |>
  group_by(district) %>%
  summarize(
    mean_cpue = mean(cpue, na.rm = TRUE),
    median_cpue = median(cpue, na.rm = TRUE),
    mean_rpue_usd = mean(rpue_usd, na.rm = TRUE),
    median_rpue_usd = median(rpue_usd, na.rm = TRUE),
    n_samples = n()
  ) %>%
  ungroup() %>%
  # Sort by average CPUE for consistent ordering
  arrange(desc(mean_cpue)) |>
  dplyr::filter(!district == "north_a")

# Create the ordered district factor to ensure consistent ordering
ordered_districts <- district_summary$district

# Make the ordered version of the data
dat_ordered <- dat %>%
  mutate(district = factor(district, levels = ordered_districts)) |>
  dplyr::filter(!district == "north_a")

# Create a cleaner visualization
p1 <- ggplot(dat_ordered, aes(x = district, y = cpue, fill = district)) +
  geom_violin(alpha = 0.7, trim = TRUE, scale = "width") +
  geom_boxplot(width = 0.15, alpha = 0.7, outlier.shape = NA) +
  scale_fill_manual(values = district_colors) +
  labs(
    title = "Catch per Unit Effort by District",
    x = "",
    y = "CPUE (kg/fisher/hour)"
  ) +
  # Apply your theme
  theme_modern() +
  # Add modifications AFTER applying the theme
  theme(
    legend.position = "none",
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8)
  ) +
  # Adjust y-axis limits to make room for labels at the bottom
  scale_y_continuous(limits = c(-0.5, NA)) +
  # Add sample size labels with adjusted y-position
  geom_text(
    data = district_summary,
    aes(
      x = district, y = -0.3, # Position below 0
      label = sprintf("n=%d", n_samples)
    ),
    size = 2.5
  )

# For the bar chart - adjust positions
p2 <- ggplot(district_summary, aes(x = reorder(district, mean_cpue), y = mean_cpue, fill = district)) +
  geom_col() +
  scale_fill_manual(values = district_colors) +
  # Expand the x-axis limits to make room for labels
  scale_x_discrete(expand = expansion(add = c(0, 2))) +
  coord_flip() +
  labs(
    title = "Average CPUE by District",
    x = "",
    y = "kg/fisher/hour"
  ) +
  # Apply your theme
  theme_modern() +
  # Add modifications AFTER applying the theme
  theme(
    legend.position = "none",
    panel.grid.major.y = element_blank()
  ) +
  # Add text with adjusted position
  geom_text(aes(label = sprintf("%.2f", mean_cpue)),
    hjust = -0.3, size = 3
  )

p1 + p2 + patchwork::plot_layout(widths = c(1.5, 1))

Revenue

Show code
district_summary <- dat %>%
  dplyr::select(district, cpue, rpue) |>
  dplyr::mutate(rpue_usd = rpue * 0.00037) |>
  group_by(district) %>%
  summarize(
    mean_cpue = mean(cpue, na.rm = TRUE),
    median_cpue = median(cpue, na.rm = TRUE),
    mean_rpue_usd = mean(rpue_usd, na.rm = TRUE),
    median_rpue_usd = median(rpue_usd, na.rm = TRUE),
    n_samples = n()
  ) %>%
  ungroup() %>%
  # Sort by average CPUE for consistent ordering
  arrange(desc(mean_rpue_usd)) |>
  dplyr::filter(!district == "north_a")

# Create the ordered district factor to ensure consistent ordering
ordered_districts <- district_summary$district

# Make the ordered version of the data
dat_ordered <- dat %>%
  mutate(district = factor(district, levels = ordered_districts)) |>
  dplyr::filter(!district == "north_a")

# Create a cleaner visualization
p1 <- ggplot(dat_ordered, aes(x = district, y = rpue * 0.00037, fill = district)) +
  geom_violin(alpha = 0.7, trim = TRUE, scale = "width") +
  geom_boxplot(width = 0.15, alpha = 0.7, outlier.shape = NA) +
  scale_fill_manual(values = district_colors) +
  labs(
    title = "Revenue per Unit Effort by District",
    x = "",
    y = "RPUE (USD/fisher/hour)"
  ) +
  # Apply your theme
  theme_modern() +
  # Add modifications AFTER applying the theme
  theme(
    legend.position = "none",
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8)
  ) +
  # Adjust y-axis limits to make room for labels at the bottom
  scale_y_continuous(limits = c(-0.5, NA), n.breaks = 10) +
  # Add sample size labels with adjusted y-position
  geom_text(
    data = district_summary,
    aes(
      x = district, y = -0.3, # Position below 0
      label = sprintf("n=%d", n_samples)
    ),
    size = 2.5
  )

# For the bar chart - adjust positions
p2 <- ggplot(district_summary, aes(x = reorder(district, mean_rpue_usd), y = mean_rpue_usd, fill = district)) +
  geom_col() +
  scale_fill_manual(values = district_colors) +
  # Expand the x-axis limits to make room for labels
  scale_x_discrete(expand = expansion(add = c(0, 2))) +
  coord_flip() +
  labs(
    title = "RPUE by District",
    x = "",
    y = "USD/fisher/hour"
  ) +
  # Apply your theme
  theme_modern() +
  # Add modifications AFTER applying the theme
  theme(
    legend.position = "none",
    panel.grid.major.y = element_blank()
  ) +
  # Add text with adjusted position
  geom_text(aes(label = sprintf("%.2f", mean_rpue_usd)),
    hjust = -0.3, size = 3
  )

p1 + p2 + patchwork::plot_layout(widths = c(1.5, 1))

Fish groups

Show code
taxa_overall <- taxa %>%
  # Sum catch weight by fish group across all districts
  dplyr::group_by(fish_group) %>%
  dplyr::filter(!is.na(fish_group)) |> 
  dplyr::summarize(
    tot_catch_kg = sum(tot_catch_kg),
    .groups = "drop"
  ) %>%
  # Calculate percentage of total catch
  dplyr::mutate(
    total_catch = sum(tot_catch_kg),
    prc = tot_catch_kg / total_catch * 100
  ) %>%
  # Arrange in descending order of catch
  dplyr::arrange(dplyr::desc(tot_catch_kg)) %>%
  # Take top 5 and create "Others" category
  dplyr::mutate(
    rank = dplyr::row_number(),
    fish_category = if_else(rank <= 5, fish_group, "Others")
  ) %>%
  # Group by fish category (combining "Others")
  dplyr::group_by(fish_category) %>%
  dplyr::summarize(
    tot_catch_kg = sum(tot_catch_kg),
    prc = sum(prc),
    .groups = "drop"
  ) %>%
  # Arrange again for consistent ordering
  dplyr::arrange(dplyr::desc(tot_catch_kg)) %>%
  # Create label with percentage for the pie chart
  dplyr::mutate(label = paste0(fish_category, "\n", round(prc, 1), "%"))

# Create a pie chart
ggplot(taxa_overall, aes(x = "", y = prc, fill = fish_category)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0) +
  # Use a colorful palette
  scale_fill_brewer(palette = "Set2") +
  # Add labels
  geom_text(aes(label = label),
            position = position_stack(vjust = 0.5),
            size = 3.5
  ) +
  # Add informative title
  labs(
    title = "Overall Fish Group Composition in Zanzibar",
    subtitle = "Top 5 fish groups across all districts",
    caption = "Source: PESKAS"
  ) +
  # Remove unnecessary elements
  theme_void() +
  theme(
    plot.title = element_text(size = 18, face = "bold", margin = margin(b = 10)),
    plot.subtitle = element_text(size = 12, color = "#555555", margin = margin(b = 20)),
    plot.caption = element_markdown(size = 8, color = "#888888", margin = margin(t = 15)),
    legend.position = "none",
    plot.margin = margin(20, 25, 20, 25)
  )

Show code
taxa_processed <- 
  taxa |> 
  dplyr::filter(!is.na(fish_group)) %>%
  dplyr::group_by(district) %>%
  # Arrange in descending order of catch within each district
  dplyr::arrange(district, desc(tot_catch_kg)) %>%
  # Add a rank column within each district
  dplyr::mutate(rank = dplyr::row_number()) %>%
  # Create a new column for the grouped categories
  dplyr::mutate(
    fish_category = dplyr::if_else(rank <= 5, fish_group, "Others")
  ) %>%
  # Remove the rank column as it's no longer needed
  dplyr::select(-rank) %>%
  # Group by both district and the new fish category
  dplyr::group_by(district, fish_category) %>%
  # Summarize to combine all "others" into one row per district
  dplyr::summarize(
    tot_catch_kg = sum(tot_catch_kg),
    prc = sum(prc),
    .groups = "drop"
  ) %>%
  # Create a factor with "Others" as the last level
  dplyr::mutate(
    # Create ordered factor where "Others" is always last regardless of percentage
    fish_category = factor(fish_category, 
                         levels = c(setdiff(unique(fish_category), "Others"), "Others"))
  ) %>%
  # Now arrange by district, then by this factor (which keeps "Others" last)
  dplyr::arrange(district, desc(fish_category != "Others"), desc(tot_catch_kg))


ggplot(taxa_processed, aes(x = prc, y = reorder_within(fish_category, 
                                                      ifelse(fish_category == "Others", -Inf, prc), 
                                                      district))) +
  # Add dots, sized by percentage
  geom_point(aes(size = prc, color = district), alpha = 0.8) +
  
  # Add connecting segments
  geom_segment(aes(xend = 0, 
                   yend = reorder_within(fish_category, 
                                       ifelse(fish_category == "Others", -Inf, prc), 
                                       district), 
                   color = district),
    alpha = 0.3, size = 0.8
  ) +
  
  # Add percentage labels
  geom_text(aes(label = paste0(round(prc, 1), "%"), color = district),
    hjust = -0.5,
    size = 3,
    show.legend = FALSE
  ) +
  
  # Set size scale
  scale_size_continuous(range = c(2, 8)) +
  scale_color_manual(values = district_colors) +
  
  # Scale y axis with region-specific ordering
  scale_y_reordered() +
  
  # Set x-axis scale
  scale_x_continuous(
    labels = function(x) paste0(x, "%"),
    limits = c(0, max(taxa_processed$prc) * 1.2),
    expand = c(0, 0)
  ) +
  
  # Add informative labels
  labs(
    title = "Fish Groups Composition in Zanzibar",
    subtitle = "Top 5 fish groups by district (percentage of total catch)",
    x = "Percentage of catch",
    y = "",
    caption = "Source: PESKAS"
  ) +
  
  # Apply the custom theme
  theme_modern() +
  # Facet by region
  facet_wrap(~district, ncol = 2, scales = "free_y") +
  
  # Hide the size legend
  guides(
    size = "none",
    color = "none"
  )